
library(rio)
library(ggplot2)
library(texreg)
library(tidyverse)
library(AER)

##set the working directory for the replication archive
working_dir <- "~/Dropbox/dueling project/2nd_paper/replication archive/"

source(paste0(working_dir,"Auxiliary Scripts/RegressionRunner.R"))

##load in the main datasets
all_together <- readRDS(paste0(working_dir,"Data/contemporary_social_capital_data_with_news.RDS"))
rel_growth <- readRDS(paste0(working_dir,"Data/religionEarly20thCen.RDS"))


##prepare the regressions by defining the names of the dependent variables
varnames <- c("AllOrgs","Putnam","pvote2012","nccs2014",
              "DRUGTOT", "MURDER", "RAPE", "GRNDTOT",
              "Mortality", "Premature mortality rate",
              "% without health insurance"
)

varnames_rev <- c("Num. Social Orgs","Num. Civic Orgs","Voter Turnout (2012)","Num. Nonprofits",
                  "Drug Arrest Rate", "Murder Arrest Rate", "Rape Arrest Rate", "All Arrest Rate",
                  "Mortality Rate", "Premature Mortality Rate",
                  "% without Health Insurance"
)


##run the regressions for both the pretreatment and post treatment cases
pretreat_only <- as.data.frame(do.call(rbind, lapply(varnames, reg_runner_rev, covars = "pretreat", dataset = all_together)))
all_vars <- as.data.frame(do.call(rbind, lapply(varnames, reg_runner_rev, covars = "all", dataset = all_together)))

pretreat_only$Covariates <- "1890 Covariates Only"
all_vars$Covariates <- "1890 Covariates +\nContemporary Controls"

##bind together the results
all_est <- rbind(pretreat_only, all_vars)
all_est$DV <- as.factor(rep(varnames_rev, 2))


# ##run the church adherence regressions
# mod1906 <- lm(log((AllAdherents1906 + 1)/(pop1906 + 1)) ~ log(numPost + 1)  + log((total_value_1890 + 1)/tot_pop_1890) + 
#                 factor(state), data = rel_growth)
# mod1916 <- lm(log((AllAdherents1916 + 1)/(pop1916 + 1)) ~ log(numPost + 1)  + log((total_value_1890 + 1)/tot_pop_1890) + 
#                 factor(state), data = rel_growth)
# mod1926 <- lm(log((AllAdherents1926 + 1)/(pop1926 + 1)) ~ log(numPost + 1)  + log((total_value_1890 + 1)/tot_pop_1890) + 
#                 factor(state), data = rel_growth)
# mod1936 <- lm(log((AllAdherents1936 + 1)/(pop1936 + 1)) ~ log(numPost + 1)  + log((total_value_1890 + 1)/tot_pop_1890) + 
#                 factor(state), data = rel_growth)
# 
# AdherentRegs <- data.frame("Estimate" = c(coef(mod1906)[2], coef(mod1916)[2], coef(mod1926)[2], coef(mod1936)[2]),
#                            "`Std. Error`" = c(summary(mod1906)$coef[2,2], summary(mod1916)$coef[2,2], summary(mod1926)$coef[2,2], summary(mod1936)$coef[2,2]),
#                            "Covariates" = "1890 Covariates Only",
#                            "DV" = c("Religious Adherents p.c. (1906)", "Religious Adherents p.c. (1916)",
#                                     "Religious Adherents p.c. (1926)","Religious Adherents p.c. (1936)"))
# 
# colnames(AdherentRegs)[2] <- "Std. Error"


##run the newspaper regressions (contemporary)
news_model1 <- lm(scale(log(all_papers + 1)) ~ log(numPost + 1) + log(tot_pop/area) + 
                    sh_black_pop + sh_foreign_born_wh_1890 + 
                    MajPartyVote_1884 + IncumbPartyVote_1884 + MajPartyVote_1888 + IncumbPartyVote_1888 +
                    factor(state), data = all_together)

news_model2 <- lm(scale(log(all_papers + 1)) ~ log(numPost + 1) + log(tot_pop/area) + 
                    sh_black_pop + sh_foreign_born_wh_1890 + 
                    MajPartyVote_1884 + IncumbPartyVote_1884 + MajPartyVote_1888 + IncumbPartyVote_1888 +
                    log(IncPC_2010) + I(`% non-Hispanic white`) +
                    I(`Gini coefficient`) + I(`Poverty rate`) + I(`% adults with BA`) +
                    factor(state), data = all_together)


newsies <- data.frame("Estimate" = c(coef(news_model1)[2], coef(news_model2)[2]),
                      "Std. Error" = c(summary(news_model1)$coef[2,2], summary(news_model1)$coef[2,2]),
                      "Covariates" = c("1890 Covariates Only","1890 Covariates +\nContemporary Controls"),
                      "DV" = rep("Num. Newspapers (2000)", 2))
colnames(newsies)[2] <- "Std. Error"


all_est2 <- rbind(all_est, newsies)
# AdherentRegs$OutcomeType <- "Near-Term Outcome Variables"
# all_est2$OutcomeType <- "Distal Outcomes"

##make coefficient plots
sc_plots_rev <-
  # rbind(AdherentRegs,all_est2) %>%
  all_est2 %>%
  # mutate(OutcomeType_rev = relevel(as.factor(OutcomeType), ref = "Near-Term Outcome Variables")) %>%
  filter(DV != "Num. Olson Orgs") %>%
  ggplot(aes(reorder(DV,Estimate), Estimate, colour = Covariates)) +
  geom_point(size = 5, position=position_dodge(width=0.5)) +
  geom_errorbar(
    aes(ymin = Estimate - 1.65*`Std. Error`, ymax = Estimate + 1.65*`Std. Error`),
    width = 0.01,
    linetype = "solid",
    position=position_dodge(width=0.5)) +
  xlab("") +
  ylab("Coefficient on Post Offices (logged, 1890)") +
  geom_hline(yintercept = 0, linetype = "dotted") +
  coord_flip() + 
  theme_bw() +
  theme(legend.position = "bottom", 
        legend.title = element_text(size = 25),
        axis.text.x = element_text(size = 25), axis.text.y = element_text(size = 25),
        legend.text = element_text(size = 25),
        axis.title.x = element_text(size = 25),
        axis.title.y = element_text(size = 25),
        strip.text.x = element_text(size = 25)) +
  # facet_wrap(~ OutcomeType_rev, ncol = 1, scales = "free") +
  ylim(-0.5, 0.5) + 
  theme(legend.position="bottom") +
  labs(colour = "")

ggsave(sc_plots_rev, 
       file = paste0(working_dir,"Replication Plots/social_capital_plots_gini_control.pdf"), 
       width = 13, height = 7)



